library(tidyverse)
library(lubridate)
#import data 
library(readr)
library(plotly)
move_fri <- read_csv("~/Documents/2021 Spring/SDS235/DC2-data/Movement Data/park-movement-Fri.csv")
move_sat <- read_csv("~/Documents/2021 Spring/SDS235/DC2-data/Movement Data/park-movement-Sat.csv")
move_sun <- read_csv("~/Documents/2021 Spring/SDS235/DC2-data/Movement Data/park-movement-Sun.csv")
#for each id, their min Timestamp is entry_time and max Timestamp is exit_time 
time_move_fri <- move_fri %>%
   mutate(Timestamp = ymd_hms(Timestamp)) %>%
  group_by(id) %>%
  mutate(exit_time = max(Timestamp),
         entry_time = min(Timestamp))

#only keep individual's exit_time and entry_time to reduce entries -- now each id has two entries 
time_move_fri <- time_move_fri %>%
  filter(Timestamp %in% c(exit_time, entry_time))

#calculate duration of time 
duration_fri <- time_move_fri %>%
  group_by(id) %>%
  mutate(duration = exit_time - entry_time) %>%
  mutate(duration = as.numeric(duration)) %>%
  mutate(duration = round(duration, digits = 6))

#check group size 
#n/2 because each id has two entries now 
group_size_fri <- duration_fri %>%
  group_by(duration) %>%
  summarize(group_size=n()/2) %>%
  arrange(desc(group_size))
## `summarise()` ungrouping output (override with `.groups` argument)
#plot ppl's entry time, 
#round to half an hour 
entry_fri <- time_move_fri %>%
  distinct(id, entry_time) %>%
  group_by(entry_time = round_date(entry_time, "10 mins"))%>%
  summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
entry_p_fri <- entry_fri %>%
  ggplot(aes(x=entry_time, y=num_entry)) + 
  geom_line(color="#000066") +
  xlab("Entry Time on Friday") +
  coord_cartesian( ylim = c(0,1250)) +
 # scale_colour_manual(values="#000066") + 
  scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
  theme_minimal()

#round to 10 mins so that it's easier to group_by 
exit_fri <- time_move_fri %>%
  distinct(id, exit_time) %>%
  group_by(exit_time = round_date(exit_time, "10 mins"))%>%
  summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
exit_p_fri <- exit_fri %>%
  ggplot(aes(x=exit_time, y=num_entry)) + 
  geom_line(color="#000066") +
  xlab("Exit Time on Friday") +
  coord_cartesian(ylim = c(0,750)) +
  scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
  theme_minimal()


group_size_p_fri <- group_size_fri %>%
  ggplot(aes(x=duration,y=group_size)) +
  geom_point(color="#000066") +
    ylim(c(0,45)) + 
  ggtitle("Scatterplot of Duration And Group Size on Friday") +
  theme_minimal()
#repeat the same thing for Saturday 
time_move_sat <- move_sat %>%
  mutate(Timestamp = ymd_hms(Timestamp)) %>%
  group_by(id) %>%
  mutate(exit_time = max(Timestamp),
         entry_time = min(Timestamp)) %>%
  filter(Timestamp %in% c(exit_time, entry_time)) 

duration_sat <- time_move_sat %>%
group_by(id) %>%
  mutate(duration = difftime(exit_time, entry_time, units = "hours")) %>%
  mutate(duration = as.numeric(duration)) %>%
  mutate(duration = round(duration, digits = 6))

#check group size 
#n/2 because each id has two entries now 
group_size_sat <- duration_sat %>%
  group_by(duration) %>%
  summarize(group_size=n()/2) %>%
  arrange(desc(group_size))
## `summarise()` ungrouping output (override with `.groups` argument)
#plot ppl's entry time, 
#round to 10 mins
entry_sat <- time_move_sat %>%
  distinct(id, entry_time) %>%
  group_by(entry_time = round_date(entry_time, "10 mins"))%>%
  summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
entry_p_sat <- entry_sat %>%
  ggplot(aes(x=entry_time, y=num_entry)) + 
  geom_line(color="#339999") +
  xlab("Entry Time on Saturday") +
  coord_cartesian( ylim = c(0,1250)) +
  scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
  theme_minimal()

exit_sat <- time_move_sat %>%
  distinct(id, exit_time) %>%
  group_by(exit_time = round_date(exit_time, "10 mins"))%>%
  summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
exit_p_sat <- exit_sat %>%
  ggplot(aes(x=exit_time, y=num_entry)) + 
  geom_line(color="#339999") +
  xlab("Exit Time on Saturday") +
  coord_cartesian( ylim = c(0,750)) +
  scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
  theme_minimal()

group_size_p_sat <- group_size_sat %>%
  ggplot(aes(x=duration,y=group_size)) +
  geom_point(color="#339999") +
  ylim(c(0,45)) + 
  ggtitle("Scatterplot of Duration And Group Size on Saturday") +
  theme_minimal()
#repeat the same thing for Sunday 
time_move_sun <- move_sun %>%
  mutate(Timestamp = ymd_hms(Timestamp)) %>%
  group_by(id) %>%
  mutate(exit_time = max(Timestamp),
         entry_time = min(Timestamp)) %>%
  filter(Timestamp %in% c(exit_time, entry_time)) 
## Warning: Problem with `mutate()` input `Timestamp`.
## ℹ  1 failed to parse.
## ℹ Input `Timestamp` is `ymd_hms(Timestamp)`.
## Warning: 1 failed to parse.
duration_sun <- time_move_sun %>%
group_by(id) %>%
  mutate(duration = difftime(exit_time, entry_time, units = "hours")) %>%
  mutate(duration = as.numeric(duration)) %>%
  mutate(duration = round(duration, digits = 6))

#check group size 
#n/2 because each id has two entries now 
group_size_sun <- duration_sun %>%
  group_by(duration) %>%
  summarize(group_size=n()/2) %>%
  arrange(desc(group_size))
## `summarise()` ungrouping output (override with `.groups` argument)
#plot ppl's entry time, 
#round to half an hour 
entry_sun <- time_move_sun %>%
  distinct(id, entry_time) %>%
  group_by(entry_time = round_date(entry_time, "10 mins"))%>%
  summarize(num_entry = n())
## `summarise()` ungrouping output (override with `.groups` argument)
entry_p_sun <- entry_sun %>%
  ggplot(aes(x=entry_time, y=num_entry)) + 
  geom_line(color="#FF6600") +
  xlab("Entry Time on Sunday") +
  coord_cartesian( ylim = c(0,1250)) +
  scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
  theme_minimal()

exit_sun <- time_move_sun %>%
  distinct(id, exit_time) %>%
  group_by(exit_time = round_date(exit_time, "10 mins"))%>%
  summarize(num_entry = n()) 
## `summarise()` ungrouping output (override with `.groups` argument)
exit_p_sun <- exit_sun %>%
  ggplot(aes(x=exit_time, y=num_entry)) + 
  geom_line(color="#FF6600") +
  xlab("Exit Time on Sunday") +
  coord_cartesian( ylim = c(0,750)) +
  scale_x_datetime(date_breaks = "2 hours", date_labels = "%a %I %p") +
  theme_minimal()

group_size_p_sun <- group_size_sun %>%
  ggplot(aes(x=duration,y=group_size)) +
  geom_point(color="#FF6600") +
    ylim(c(0,45)) + 
  ggtitle("Scatterplot of Duration And Group Size on Sunday") +
  theme_minimal()

Visualizations

Number of entry and exit over three days

library(htmltools)


entry_plot_fri <- ggplotly(entry_p_fri)
entry_plot_sat <- ggplotly(entry_p_sat)
entry_plot_sun <- ggplotly(entry_p_sun)

tagList(entry_plot_fri,entry_plot_sat, entry_plot_sun)
exit_plot_fri <- ggplotly(exit_p_fri)
exit_plot_sat <- ggplotly(exit_p_sat)
exit_plot_sun <- ggplotly(exit_p_sun)

tagList(exit_plot_fri,exit_plot_sat, exit_plot_sun)

Group size and the time they spent in the park

group_plotly_fri <- ggplotly(group_size_p_fri)
group_plotly_sat <- ggplotly(group_size_p_sat)
group_plotly_sun <- ggplotly(group_size_p_sun)

tagList(group_plotly_fri,group_plotly_sat, group_plotly_sun)

A closer look into specific groups

#to test these are actually group of ppl, we filter out one duration of time, and inspect the movement in that group 
group_3_list <- duration_sat %>%
  filter(duration == 13.493611) 

group_14_list <- duration_sat %>%
  filter(duration == 13.360000) 

group_3_move <- move_sat %>%
  filter(id %in% c(group_3_list$id)) %>%
  ggplot(aes(X,Y)) +
  geom_count(alpha=0.5,color="#339999") +
#  geom_path() +
  facet_wrap(~as.factor(id)) + 
  ggtitle("Individual Movement in a Group of 3 on Saturday") + 
  theme_minimal() 
group_3_move

group_14_move <- move_sat %>%
  filter(id %in% c(group_14_list$id)) %>%
  ggplot(aes(X,Y)) +
  geom_point(alpha=0.5,color="#339999") +
#  geom_count() +
#  geom_path() +
  facet_wrap(~as.factor(id)) +
  ggtitle("Individual Movement in a Group of 14 on Saturday") + 
  theme_minimal()
group_14_move

Citations:

time difference: https://stackoverflow.com/questions/30510044/how-to-make-time-difference-in-same-units-when-subtracting-posixct

Jordan’s demo on time series: https://jcrouser.github.io/CSC235/time-demo.html

arrange plotly objects: https://plotly-r.com/arranging-views.html#arranging-plotly-objects

Color choices: https://stackoverflow.com/questions/26195231/ggplot2-manually-specifying-colour-with-geom-line

ggplot2 geom_count in plotly: https://plotly.com/ggplot2/geom_count/